/*******************************************************************************

Manyara make_ViltoAV.do

2018.06.18 (DJ) Created

This do file:
(i) constructs all villages-all agrovets pair with distances, 
(ii) calculates travel-adjusted input prices from each village to each agrovet. 
(iii) for each village, it finds the minimum travel-adj input prices
(iv) collapses the village-agrovet level data into village-level data 
	 with best travel-adjusted input prices. 

The output data set is the village-level <Manyara ViltoAV.dta>.

*******************************************************************************/


	
	
/*******************************************************************************
*******************************************************************************

	Part 1. Construct data sets with the Missing Census Agrovets

*******************************************************************************
*******************************************************************************/

	
local file ViltoAV_censusappended
local data agrovet_shop_level_censusappended
	
	** Step (1): make a pair for each village-agrovet shop
		use "${analysis}/Manyara `data'.dta", clear	
		keep av_village_id av_ownerid av_shopid av_district av_ward av_village ///
			 av_gps_lat av_gps_long sellfert sellseeds av_*price* av_*qty* av_*rev* dist_border_km av_urea_17_median_imput
		gen to_merge = 1
		
		tempfile avshoplist
		save `avshoplist'
		
		use "${analysis}/Manyara Vil_distance_to_borders.dta", replace
		rename(district ward village_name) (survey_district survey_ward survey_village) 
		tempfile distborder
		save `distborder'
		
		use "${analysis}/Manyara village_GPS.dta", replace
		keep village_id survey_district survey_ward survey_village village_lat village_lon
		merge 1:1 survey_district survey_ward survey_village using `distborder'
		drop _merge
		gen to_merge = 1
		tempfile villist
		save `villist'
		
		joinby to_merge using `avshoplist'
		drop to_merge
		
		gen has_agrovet_village = 0
		replace has_agrovet_village = 1 if village_id == av_village_id
			
		// calculate distances from each village to each agrovet using the routing method, ViltoVil_KMCOST_share.dta
			foreach var in survey_district survey_ward survey_village av_district av_ward av_village {
				replace `var' = subinstr(`var', "_", " ", .)
			}	
			tempfile vilavpair
			save `vilavpair'
		
		
		** Bring in travel costs directly measured from Transport Surveys
			use `ViltoMkt_KMCOST', clear
			keep if survey_region_O == "MANYARA" & survey_region_D == "MANYARA"
			rename (survey_district_O survey_ward_O survey_village_O survey_district_D survey_ward_D survey_village_D) ///
				   (survey_district survey_ward survey_village av_district av_ward av_village)
			keep survey_district survey_ward survey_village av_district av_ward av_village DIST_*
			foreach var in survey_district survey_ward survey_village av_district av_ward av_village {
				replace `var' = subinstr(`var', "_", " ", .)
			}	
			tempfile measuredcost
			save `measuredcost'
			
			use `vilavpair'
			merge m:1 survey_district survey_ward survey_village av_district av_ward av_village using `measuredcost'
			drop if _merge==2
			drop _merge	
			rename DIST_km_direct google_vil_AVvil_km
		
		
		
		** Fertilizer
			* Define the type of fertilizer we will be using
			global type urea
			global costperkm 138 // 0.06*2300 comes from Table 2 NEW. <--- *************** This is obsolete ***************
			global numtripeqv 2.6969 // 1trip going + 1trip coming + 0.6969 carrying. 
									// 0.6969 comes from the coefficients of hours in col(2) and (4) in Appendix Table 2. 
			
			//global viltovilkm DIST_km_direct
			global viltovilkm google_vil_AVvil_km
			
			* (1) Using Market Transport surveys directly and calibrated carrying cost
			gen travel_adj_price_50kg	=	av_${type}_rprice_17 + ${viltovilkm}*${costperkm}*${numtripeqv}
			
			* (1.1) Cut the Transportation Costs in (1) by half for Alan's counterfactuals
			gen travel_adj_price_50kgM1	=	av_${type}_rprice_17 + ${viltovilkm}*${costperkm}*${numtripeqv}/2
				
			* (2) Using Alan's estimation from a trade model: Overall AVE for each bin
			gen AVE_adj_price_50kg	   = av_${type}_rprice_17 if ${viltovilkm}==0
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.20) if ${viltovilkm}>0 & ${viltovilkm}<=5
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.47) if ${viltovilkm}>5 & ${viltovilkm}<=10
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+0.78) if ${viltovilkm}>10 & ${viltovilkm}<=15
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.05) if ${viltovilkm}>15 & ${viltovilkm}<=20
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.19) if ${viltovilkm}>20 & ${viltovilkm}<=30
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+1.76) if ${viltovilkm}>30 & ${viltovilkm}<=40
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+2.18) if ${viltovilkm}>40 & ${viltovilkm}<=50
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+3.12) if ${viltovilkm}>50 & ${viltovilkm}<=100
			replace AVE_adj_price_50kg = av_${type}_rprice_17*(1+5.69) if ${viltovilkm}>100 & !mi(${viltovilkm})
			
			* (2.1) Cut the Transportation Costs in (2) by half for Alan's counterfactuals
			gen AVE_adj_price_50kgM1		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.04*${viltovilkm}/2
			
			* (2) Using Alan's estimation from a trade model: 4% AVE per KM
			gen AVE2_adj_price_50kg		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.02*${viltovilkm}
			
			* (2.1) Cut the Transportation Costs in (2) by half for Alan's counterfactuals
			gen AVE2_adj_price_50kgM1		=	av_${type}_rprice_17 + av_${type}_rprice_17*0.02*${viltovilkm}/2
			
			* (3) Using Directly Measured Transport Surveys 
			gen DR_adj_price_50kg		=	av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}
	
			* (3.1) Cut the Transportation Costs in (3) by half for Alan's counterfactuals
			gen DR_adj_price_50kgM1		=	av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv}/2
	
			* (4) Using Directly Measured Transport Surveys + Imputed with Median Prices (IM) for CDFs
			gen IM_adj_price_50kg		=	av_${type}_17_median_imput + DIST_cost_USD_direct*2300*${numtripeqv}
		
			* (4.1) Cut the Transportation Costs in (4) by half for Alan's counterfactuals
			gen IM_adj_price_50kgM1		=	av_${type}_17_median_imput + DIST_cost_USD_direct*2300*${numtripeqv}/2
	
			* (5) After dropping some agrovets near borders
			foreach dist in 3 5 10 15{
				gen DR`dist'_adj_price_50kg = av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv} if dist_border_km>`dist'
			}
			
			* (6) Prices from the nearest shops
				// Nearest 1st and 2nd Agrovet Shop (i.e. 2nd village is allowed to be the same as the 1st village)
					gen mi_price_or_km = (mi(av_urea_rprice_17)|mi(${viltovilkm}))
					
					// rank by distance AND prices
					bys mi_price_or_km village_id (${viltovilkm} av_urea_rprice_17): gen rank_to_anyav = _n if mi_price_or_km==0
					gen ${viltovilkm}_avrank1= ${viltovilkm} if rank_to_anyav==1
					gen ${viltovilkm}_avrank2= ${viltovilkm} if rank_to_anyav==2
				
				// Nearest 1st and 2nd Village for FERTILIZER (i.e. 2nd village has to be different from the 1st village)
					egen tag = tag(village_id av_district av_ward av_village)
					egen villagepair_id = group(village_id av_district av_ward av_village)
					
					// create the rank at the village-AV village level and fill down
					bys mi_price_or_km village_id: egen tempvar = rank(${viltovilkm}) if tag & mi_price_or_km==0, unique
					bys mi_price_or_km village_id villagepair_id: egen rank_to_anyvil = total(tempvar), m
					
					// distances to the nearest and second nearest village
					gen ${viltovilkm}_rank1= ${viltovilkm} if rank_to_anyvil==1
					gen ${viltovilkm}_rank2= ${viltovilkm} if rank_to_anyvil==2
					
					
				// Nearest 1st and 2nd village for SEEDS
					cap drop tempvar
					bys village_id : egen tempvar = rank(${viltovilkm}) if tag & sellseeds==1, unique
					bys village_id villagepair_id: egen rank_to_av_seeds = total(tempvar), m
					
					// distances to the nearest and second nearest village with SEEDS
					gen ${viltovilkm}_rank1_seed= ${viltovilkm} if rank_to_av_seeds==1
					gen ${viltovilkm}_rank2_seed= ${viltovilkm} if rank_to_av_seeds==2
					
					
				order village_id
				sort village_id ${viltovilkm}
				
				// Construct the travel-adj price from the nearest agro-input sellers
				gen NEAR_adj_price_50kg = av_${type}_rprice_17 + DIST_cost_USD_direct*2300*${numtripeqv} if rank_to_anyav==1
				
				sum NEAR_adj_price_50kg DR_adj_price_50kg 
				
				
		tempfile vil_av_long
		save `vil_av_long'
		

		
	** Step (3):  For each village, pick the minimum travel-cost adjusted prices 
		
		// minimum travel-cost adjusted prices among all agrovets with positive prices
		foreach var in "50kg" "50kgM1"{
		
			// BASIC TRAVEL COST
				// pick the minimum price
				bys village_id: egen min_travel_adj_price_`var'	= min(travel_adj_price_`var')
				bys village_id: egen min_travel_adj_price_`var'_N	= count(travel_adj_price_`var')
				
				// distance, travel costs, and unadjusted price to the best agrovet
				gen km_travel_adj_`var' 		= ${viltovilkm} 								if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
				gen tripcost_travel_adj_`var' 	= ${viltovilkm}*${costperkm}				 	if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
				gen unadjusted_price_`var'  	= av_${type}_rprice_17 								if min_travel_adj_price_`var' == travel_adj_price_`var' 	& !mi(travel_adj_price_`var')
		}
		
		
		foreach var in "50kg"{
		
			// OTHER METHODS OF COMPUTING TRANSPORT COSTS
				// DIRECT TRANSPORT COSTS
				foreach method in NEAR DR AVE AVE2 DR3 DR5 DR10 DR15{
					// pick the minimum price
					bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
					bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
					
					// distance, travel costs, and unadjusted price to the best agrovet
					gen km_`method'_adj_`var'		    = ${viltovilkm} 			if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen unadjusted_price_`var'_`method' = av_${type}_rprice_17 		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
				}	
				
				// DIRECT TRANSPORT COSTS + IMPUTED MEDIAN PRICE FOR MISSING AGROVETS
				foreach method in IM{
					// pick the minimum price
					bys village_id: egen min_`method'_adj_price_`var'	= min(`method'_adj_price_`var')
					bys village_id: egen min_`method'_adj_price_`var'_N	= count(`method'_adj_price_`var')
					
					// distance, travel costs, and unadjusted price to the best agrovet
					gen km_`method'_adj_`var'		    = ${viltovilkm} 			if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen tripcost_`method'_adj_`var' 	= DIST_cost_USD_direct*2300		if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
					gen unadjusted_price_`var'_`method' = av_${type}_17_median_imput	if min_`method'_adj_price_`var' == `method'_adj_price_`var' & !mi(`method'_adj_price_`var')
				}
				
				/*
								
				. fsum min_*50kg
				
								  Variable |        N     Mean       SD      Min      Max                                                                                                                              
				---------------------------+---------------------------------------------
				 min_travel_adj_price_50kg |    55796 59102.57 11859.32 40000.00 1.09e+05  
					 min_DR_adj_price_50kg |    62640 62837.79 14650.39 40000.00 1.98e+05  
					 min_IM_adj_price_50kg |    62640 62837.79 14650.39 40000.00 1.98e+05  
				*/
				
		}
		
		
		
		// minimum prices among agrovets within restricted distances (within 10km and 20km)
		pause off
		foreach var in "50kg" "50kgM1"{
			
			foreach type in travel_adj DR_adj IM_adj AVE_adj AVE2_adj{
				foreach dist in 10 20{
					tempvar `type'`var'`dist'
					gen ``type'`var'`dist''= `type'_price_`var' if ${viltovilkm}<=`dist'
					bys village_id: egen min`dist'_`type'_price_`var' = min(``type'`var'`dist'')
					bys village_id: egen min`dist'_`type'_price_`var'_N = count(``type'`var'`dist'')
					
					// distances to the agrovets with minimum travel-adj prices
					gen km_`type'`dist'_`var' = ${viltovilkm} if min`dist'_`type'_price_`var' == ``type'`var'`dist'' & !mi( ``type'`var'`dist'')
				}
			}
		}	
		
		// number of agrovets within X km that sell seeds, fertilizers, or either
			pause on
			foreach dist in 5 10 20{
				tempvar fert seed fertseed

				// number of AV that sell seeds
				gen `seed' = (sellseeds==1 & ${viltovilkm}<=`dist') if !mi(${viltovilkm})&!mi(sellseeds)
				bys village_id: egen num_av_seed_less_`dist'km = total(`seed'), m
	
				// number of AV that sell fertilizer
				gen `fert'= (sellfert==1 & ${viltovilkm}<=`dist')  if !mi(${viltovilkm})&!mi(sellfert)
				bys village_id: egen num_av_fert_less_`dist'km = total(`fert'), m
				
				// number of AV that sell either seed or fertilizer
				gen `fertseed' = ((sellfert==1|sellseeds==1) & ${viltovilkm}<=`dist') if !mi(${viltovilkm})&!mi(sellseeds)&!mi(sellfert)
				bys village_id: egen num_av_fertseed_less_`dist'km = total(`fertseed'), m
				
			}
			
			gen vil_to_any${type}_av_km = ${viltovilkm} if !mi(av_${type}_rprice_17)
			gen vil_to_anyseed_av_km 	= ${viltovilkm} if !mi(av_seed_rprice_18)
			
		
			
		// collapse to the village level
		collapse (min) vil_to_anyav_km_AV1 = ${viltovilkm}_avrank1 vil_to_anyav_km_AV2 = ${viltovilkm}_avrank2 ///
					   vil_to_anyav_km_r1 = ${viltovilkm}_rank1 vil_to_anyav_km_r2 = ${viltovilkm}_rank2 ///
					   vil_to_anyav_km_r1_seed = ${viltovilkm}_rank1_seed vil_to_anyav_km_r2_seed = ${viltovilkm}_rank2_seed ///
					   vil_to_anyav_km = ${viltovilkm} ///
					   vil_to_any${type}_av_km vil_to_anyseed_av_km ///
				 (mean) min* km_* tripcost_* num_av_* unadjusted_price_* ///
				 (max) has_agrovet_village, by(village_id village_lat village_lon survey_district survey_ward survey_village)
			
		// has any agrovet within X km (dummy variable)
		foreach km in 5 10 20{
			gen has_av_fertseed_less_`km'km = (num_av_fertseed_less_`km'km>0) if !mi(num_av_fertseed_less_`km'km)
			gen has_av_fert_less_`km'km = (num_av_fert_less_`km'km>0) if !mi(num_av_fert_less_`km'km)
			gen has_av_seed_less_`km'km = (num_av_seed_less_`km'km>0) if !mi(num_av_seed_less_`km'km)
		}
		
		
	// convert to dollars
		foreach var of varlist min*price_50kg* unadjusted_price_* {
			replace `var' = `var'/2300
		}
		
	// generate other variables
		gen onewaycost_bestadj_av_${type}_usd = tripcost_travel_adj_50kg/2300
		gen travelcost_bestadj_av_${type}_usd = ${numtripeqv}*tripcost_travel_adj_50kg/2300
		
		foreach var in NEAR DR DR3 DR5 DR10 DR15{
			gen onewaycost_`var'_adj_av_${type}_usd = tripcost_`var'_adj_50kg/2300
			gen travelcost_`var'_adj_av_${type}_usd = ${numtripeqv}*tripcost_`var'_adj_50kg/2300
		}
		
	
	// winsorize
		foreach var of varlist vil_to_any*av_km*{
			winsor2 `var', cut(1 99) suffix(_w1) label
		}	
		fsum km_*
		 
	compress 
	
	tempfile manyaraViltoAV
	save `manyaraViltoAV'
